tripartite 책 코드

Author

김보람

Published

January 27, 2024

import

import pandas as pd
import os
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.model_selection import train_test_split
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder

# sklearn
import sklearn
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score

    
df = pd.read_csv("~/Desktop/fraudTrain.csv")
df = df[df["is_fraud"]==0].sample(frac=0.20, random_state=42).append(df[df["is_fraud"] == 1])
df.head()
/tmp/ipykernel_3417592/1991497680.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  df = df[df["is_fraud"]==0].sample(frac=0.20, random_state=42).append(df[df["is_fraud"] == 1])
Unnamed: 0 trans_date_trans_time cc_num merchant category amt first last gender street ... lat long city_pop job dob trans_num unix_time merch_lat merch_long is_fraud
669418 669418 2019-10-12 18:21 4.089100e+18 fraud_Haley, Jewess and Bechtelar shopping_pos 7.53 Debra Stark F 686 Linda Rest ... 32.3836 -94.8653 24536 Multimedia programmer 1983-10-14 d313353fa30233e5fab5468e852d22fc 1350066071 32.202008 -94.371865 0
32567 32567 2019-01-20 13:06 4.247920e+12 fraud_Turner LLC travel 3.79 Judith Moss F 46297 Benjamin Plains Suite 703 ... 39.5370 -83.4550 22305 Television floor manager 1939-03-09 88c65b4e1585934d578511e627fe3589 1327064760 39.156673 -82.930503 0
156587 156587 2019-03-24 18:09 4.026220e+12 fraud_Klein Group entertainment 59.07 Debbie Payne F 204 Ashley Neck Apt. 169 ... 41.5224 -71.9934 4720 Broadcast presenter 1977-05-18 3bd9ede04b5c093143d5e5292940b670 1332612553 41.657152 -72.595751 0
1020243 1020243 2020-02-25 15:12 4.957920e+12 fraud_Monahan-Morar personal_care 25.58 Alan Parsons M 0547 Russell Ford Suite 574 ... 39.6171 -102.4776 207 Network engineer 1955-12-04 19e16ee7a01d229e750359098365e321 1361805120 39.080346 -103.213452 0
116272 116272 2019-03-06 23:19 4.178100e+15 fraud_Kozey-Kuhlman personal_care 84.96 Jill Flores F 639 Cruz Islands ... 41.9488 -86.4913 3104 Horticulturist, commercial 1981-03-29 a0c8641ca1f5d6e243ed5a2246e66176 1331075954 42.502065 -86.732664 0

5 rows × 23 columns

#     def throw(df, fraud_rate):  # 사기 거래 비율에 맞춰 버려지는 함수!
#         df1 = df[df['is_fraud'] == 1].copy()
#         df0 = df[df['is_fraud'] == 0].copy()
#         df0_downsample = (len(df1) * (1-fraud_rate)) / (len(df0) * fraud_rate)
#         df0_down = df0.sample(frac=df0_downsample, random_state=42)
#         df_p = pd.concat([df1, df0_down])
#         return df_p
    
#     def split_dataframe(data_frame, test_fraud_rate, test_rate=0.3):
#         n = len(data_frame)
    
#         # 사기 거래와 정상 거래를 분리
#         fraud_data = data_frame[data_frame['is_fraud'] == 1]
#         normal_data = data_frame[data_frame['is_fraud'] == 0]

#         # 테스트 데이터 크기 계산
#         test_samples = int(test_fraud_rate * (n * test_rate))
#         remaining_test_samples = int(n * test_rate) - test_samples
    
#         # 사기 거래 및 정상 거래에서 무작위로 테스트 데이터 추출
#         test_fraud_data = fraud_data.sample(n=test_samples, replace=False)
#         test_normal_data = normal_data.sample(n=remaining_test_samples, replace=False)

#         # 테스트 데이터 합치기
#         test_data = pd.concat([test_normal_data, test_fraud_data])

#         # 훈련 데이터 생성
#         train_data = data_frame[~data_frame.index.isin(test_data.index)]

#         return train_data, test_data
    
#     def concat(df_tr, df_tst):   
#         df = pd.concat([df_tr, df_tst])
#         train_mask = np.concatenate((np.full(len(df_tr), True), np.full(len(df_tst), False)))    # index꼬이는거 방지하기 위해서? ★ (이거,, 훔,,?(
#         test_mask =  np.concatenate((np.full(len(df_tr), False), np.full(len(df_tst), True))) 
#         mask = (train_mask, test_mask)
#         return df, mask
        
def evaluation(y, yhat):
    metrics = [sklearn.metrics.accuracy_score,
               sklearn.metrics.precision_score,
               sklearn.metrics.recall_score,
               sklearn.metrics.f1_score,
               sklearn.metrics.roc_auc_score]
    return pd.DataFrame({m.__name__:[m(y,yhat).round(6)] for m in metrics})
        
#     def compute_time_difference(group):
#         n = len(group)
#         result = []
#         for i in range(n):
#             for j in range(n):
#                 time_difference = abs((group.iloc[i].trans_date_trans_time - group.iloc[j].trans_date_trans_time).total_seconds())
#                 result.append([group.iloc[i].name, group.iloc[j].name, time_difference])
#         return result

#     def edge_index_save(df, unique_col, theta, gamma):
#         groups = df.groupby(unique_col)
#         edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
#         edge_index = edge_index.astype(np.float64)
#         filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
        
#         while os.path.exists(filename):
#             self.save_attempt += 1
#             filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
#         np.save(filename, edge_index)
#         #tetha = edge_index_plust_itme[:,].mean()
    
        
#         edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
#         edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
#         return edge_index
    
#     def edge_index(df, unique_col, theta, gamma):
#         groups = df.groupby(unique_col)
#         edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
#         edge_index = edge_index.astype(np.float64)
#        # filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
        
#         # while os.path.exists(filename):
#         #     self.save_attempt += 1
#         #     filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
#         # np.save(filename, edge_index)
#         #tetha = edge_index_plust_itme[:,].mean()
    
        
#         edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
#         edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
#         return edge_index

삼분그래프

def build_graph_tripartite(df_input, graph_type=nx.Graph()):
    df=df_input.copy()
    mapping={x:node_id for node_id, x in enumerate(set(df.index.values.tolist() + 
                                                       df["cc_num"].values.tolist() +
                                                       df["merchant"].values.tolist()))}
    df["in_node"]= df["cc_num"].apply(lambda x: mapping[x])
    df["out_node"]=df["merchant"].apply(lambda x:mapping[x])
    
        
    G=nx.from_edgelist([(x["in_node"], mapping[idx]) for idx, x in df.iterrows()] +\
                        [(x["out_node"], mapping[idx]) for idx, x in df.iterrows()], create_using=graph_type)
    
    nx.set_edge_attributes(G,{(x["in_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")
     
    nx.set_edge_attributes(G,{(x["out_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")
    
    nx.set_edge_attributes(G,{(x["in_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")
    
    nx.set_edge_attributes(G,{(x["out_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")
    
    
    return G
    

지도학습

from sklearn.utils import resample

df_majority = df[df.is_fraud==0]
df_minority = df[df.is_fraud==1]

df_maj_dowsampled = resample(df_majority,
                             n_samples=len(df_minority),
                             random_state=42)

df_downsampled = pd.concat([df_minority, df_maj_dowsampled])

print(df_downsampled.is_fraud.value_counts())
G_down = build_graph_tripartite(df_downsampled)
1    6006
0    6006
Name: is_fraud, dtype: int64
from sklearn.model_selection import train_test_split


train_edges, test_edges, train_labels, test_labels = train_test_split(list(range(len(G_down.edges))), 
                                                                      list(nx.get_edge_attributes(G_down, "label").values()), 
                                                                      test_size=0.20, 
                                                                      random_state=42)
edgs = list(G_down.edges)
train_graph = G_down.edge_subgraph([edgs[x] for x in train_edges]).copy()
train_graph.add_nodes_from(list(set(G_down.nodes) - set(train_graph.nodes)))
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder

node2vec_train = Node2Vec(train_graph, weight_key='weight')
model_train = node2vec_train.fit(window=10)
Generating walks (CPU: 1): 100%|██████████| 10/10 [00:25<00:00,  2.55s/it]
classes = [HadamardEmbedder]#, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder]
for cl in classes:
    embeddings_train = cl(keyed_vectors=model_train.wv) 

train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
test_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]


# DataFrame 생성
columns = [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
df_data = pd.DataFrame(data=train_embeddings, columns=columns)

df_labels = pd.DataFrame(data=train_labels, columns=['label'])

# DataFrame 합치기
df = pd.concat([df_data, df_labels], axis=1)


label = np.array(train_labels)
predictr = TabularPredictor(label='label')
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073147/"
predictr.fit(df) 
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073147/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   600.47 GB / 982.82 GB (61.1%)
Train Data Rows:    19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [0, 1]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    18703.56 MB
    Train Data (Original)  Memory Usage: 9.76 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Types of features in original data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    Types of features in processed data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    0.2s = Fit runtime
    128 features in original data used to generate 128 features in processed data.
    Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.21s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.7677   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1822f20d0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.7666   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: LightGBMXT ...
    0.7635   = Validation score   (accuracy)
    2.77s    = Training   runtime
    0.02s    = Validation runtime
Fitting model: LightGBM ...
    0.7368   = Validation score   (accuracy)
    2.84s    = Training   runtime
    0.01s    = Validation runtime
Fitting model: RandomForestGini ...
    0.7467   = Validation score   (accuracy)
    4.3s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: RandomForestEntr ...
    0.7488   = Validation score   (accuracy)
    6.1s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: CatBoost ...
    0.7556   = Validation score   (accuracy)
    9.29s    = Training   runtime
    0.0s     = Validation runtime
Fitting model: ExtraTreesGini ...
    0.763    = Validation score   (accuracy)
    0.72s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: ExtraTreesEntr ...
    0.7609   = Validation score   (accuracy)
    0.75s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: NeuralNetFastAI ...
    0.7892   = Validation score   (accuracy)
    11.87s   = Training   runtime
    0.02s    = Validation runtime
Fitting model: XGBoost ...
    0.7593   = Validation score   (accuracy)
    10.34s   = Training   runtime
    0.03s    = Validation runtime
Fitting model: NeuralNetTorch ...
    0.7383   = Validation score   (accuracy)
    16.03s   = Training   runtime
    0.09s    = Validation runtime
Fitting model: LightGBMLarge ...
    0.7646   = Validation score   (accuracy)
    11.8s    = Training   runtime
    0.03s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.8076   = Validation score   (accuracy)
    0.81s    = Training   runtime
    0.0s     = Validation runtime
AutoGluon training complete, total runtime = 79.69s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073147/")
[1000]  valid_set's binary_error: 0.24279
<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7fa1ed618dc0>
test = np.array(test_embeddings)
columns = [f'X_{i}' for i in range(test.shape[1])]

# DataFrame 생성
test_df = pd.DataFrame(data=test, columns=columns)
y = np.array(test_labels)

yhat = predictr.predict(test_df)
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5061820>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08e50>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
evaluation(y,yhat)
accuracy_score precision_score recall_score f1_score roc_auc_score
0 0.498427 0.7 0.005836 0.011575 0.501651

classes = [HadamardEmbedder]#, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder]
for cl in classes:
    embeddings_train = cl(keyed_vectors=model_train.wv) 

train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
test_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]


# DataFrame 생성
columns = [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
df_data = pd.DataFrame(data=train_embeddings, columns=columns)

df_labels = pd.DataFrame(data=train_labels, columns=['label'])

# DataFrame 합치기
df = pd.concat([df_data, df_labels], axis=1)


label = np.array(train_labels)
predictr = TabularPredictor(label='label')
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073147/"
predictr.fit(df) 
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073147/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   600.47 GB / 982.82 GB (61.1%)
Train Data Rows:    19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [0, 1]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    18703.56 MB
    Train Data (Original)  Memory Usage: 9.76 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Types of features in original data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    Types of features in processed data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    0.2s = Fit runtime
    128 features in original data used to generate 128 features in processed data.
    Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.21s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.7677   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1822f20d0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.7666   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: LightGBMXT ...
    0.7635   = Validation score   (accuracy)
    2.77s    = Training   runtime
    0.02s    = Validation runtime
Fitting model: LightGBM ...
    0.7368   = Validation score   (accuracy)
    2.84s    = Training   runtime
    0.01s    = Validation runtime
Fitting model: RandomForestGini ...
    0.7467   = Validation score   (accuracy)
    4.3s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: RandomForestEntr ...
    0.7488   = Validation score   (accuracy)
    6.1s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: CatBoost ...
    0.7556   = Validation score   (accuracy)
    9.29s    = Training   runtime
    0.0s     = Validation runtime
Fitting model: ExtraTreesGini ...
    0.763    = Validation score   (accuracy)
    0.72s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: ExtraTreesEntr ...
    0.7609   = Validation score   (accuracy)
    0.75s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: NeuralNetFastAI ...
    0.7892   = Validation score   (accuracy)
    11.87s   = Training   runtime
    0.02s    = Validation runtime
Fitting model: XGBoost ...
    0.7593   = Validation score   (accuracy)
    10.34s   = Training   runtime
    0.03s    = Validation runtime
Fitting model: NeuralNetTorch ...
    0.7383   = Validation score   (accuracy)
    16.03s   = Training   runtime
    0.09s    = Validation runtime
Fitting model: LightGBMLarge ...
    0.7646   = Validation score   (accuracy)
    11.8s    = Training   runtime
    0.03s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.8076   = Validation score   (accuracy)
    0.81s    = Training   runtime
    0.0s     = Validation runtime
AutoGluon training complete, total runtime = 79.69s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073147/")
[1000]  valid_set's binary_error: 0.24279
<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7fa1ed618dc0>
test = np.array(test_embeddings)
columns = [f'X_{i}' for i in range(test.shape[1])]

# DataFrame 생성
test_df = pd.DataFrame(data=test, columns=columns)
y = np.array(test_labels)

yhat = predictr.predict(test_df)
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5061820>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08e50>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
evaluation(y,yhat)
accuracy_score precision_score recall_score f1_score roc_auc_score
0 0.498427 0.7 0.005836 0.011575 0.501651

classes = [AverageEmbedder] # , HadamardEmbedder, WeightedL1Embedder, WeightedL2Embedder]
for cl in classes:
    embeddings_train = cl(keyed_vectors=model_train.wv) 

train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
test_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]


# DataFrame 생성
columns = [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
df_data = pd.DataFrame(data=train_embeddings, columns=columns)

df_labels = pd.DataFrame(data=train_labels, columns=['label'])

# DataFrame 합치기
df = pd.concat([df_data, df_labels], axis=1)


label = np.array(train_labels)

predictr = TabularPredictor(label='label')

predictr.fit(df) 

test = np.array(test_embeddings)

columns = [f'X_{i}' for i in range(test.shape[1])]

# DataFrame 생성
test_df = pd.DataFrame(data=test, columns=columns)

y = np.array(test_labels)

yhat = predictr.predict(test_df)

evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073500/"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073500/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   599.89 GB / 982.82 GB (61.0%)
Train Data Rows:    19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [0, 1]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    18646.15 MB
    Train Data (Original)  Memory Usage: 9.76 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Types of features in original data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    Types of features in processed data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    0.1s = Fit runtime
    128 features in original data used to generate 128 features in processed data.
    Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.16s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5108af0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.7252   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5108af0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.7257   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.31s    = Validation runtime
Fitting model: LightGBMXT ...
    0.7997   = Validation score   (accuracy)
    13.14s   = Training   runtime
    0.07s    = Validation runtime
Fitting model: LightGBM ...
    0.7829   = Validation score   (accuracy)
    4.43s    = Training   runtime
    0.02s    = Validation runtime
Fitting model: RandomForestGini ...
    0.7425   = Validation score   (accuracy)
    3.34s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: RandomForestEntr ...
    0.7488   = Validation score   (accuracy)
    4.8s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: CatBoost ...
    0.7944   = Validation score   (accuracy)
    65.85s   = Training   runtime
    0.01s    = Validation runtime
Fitting model: ExtraTreesGini ...
    0.7493   = Validation score   (accuracy)
    0.7s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: ExtraTreesEntr ...
    0.7472   = Validation score   (accuracy)
    0.72s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: NeuralNetFastAI ...
    0.8464   = Validation score   (accuracy)
    9.77s    = Training   runtime
    0.02s    = Validation runtime
Fitting model: XGBoost ...
    0.7908   = Validation score   (accuracy)
    7.2s     = Training   runtime
    0.02s    = Validation runtime
Fitting model: NeuralNetTorch ...
    0.8311   = Validation score   (accuracy)
    19.99s   = Training   runtime
    0.01s    = Validation runtime
Fitting model: LightGBMLarge ...
    0.7834   = Validation score   (accuracy)
    8.34s    = Training   runtime
    0.02s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.8542   = Validation score   (accuracy)
    0.72s    = Training   runtime
    0.0s     = Validation runtime
AutoGluon training complete, total runtime = 141.06s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073500/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074820>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
[1000]  valid_set's binary_error: 0.227583
[2000]  valid_set's binary_error: 0.214997
[3000]  valid_set's binary_error: 0.209229
[4000]  valid_set's binary_error: 0.203461
[5000]  valid_set's binary_error: 0.201888
[6000]  valid_set's binary_error: 0.203985
[1000]  valid_set's binary_error: 0.226534
accuracy_score precision_score recall_score f1_score roc_auc_score
0 0.591777 0.82311 0.240517 0.372258 0.594076
classes = [WeightedL1Embedder]
for cl in classes:
    embeddings_train = cl(keyed_vectors=model_train.wv) 

train_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in train_edges]
test_embeddings = [embeddings_train[str(edgs[x][0]), str(edgs[x][1])] for x in test_edges]


# DataFrame 생성
columns = [f'X_{i}' for i in range(np.array(train_embeddings).shape[1])]
df_data = pd.DataFrame(data=train_embeddings, columns=columns)

df_labels = pd.DataFrame(data=train_labels, columns=['label'])

# DataFrame 합치기
df = pd.concat([df_data, df_labels], axis=1)


label = np.array(train_labels)

predictr = TabularPredictor(label='label')

predictr.fit(df) 

test = np.array(test_embeddings)

columns = [f'X_{i}' for i in range(test.shape[1])]

# DataFrame 생성
test_df = pd.DataFrame(data=test, columns=columns)

y = np.array(test_labels)

yhat = predictr.predict(test_df)

evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240127_073726/"
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240127_073726/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   599.32 GB / 982.82 GB (61.0%)
Train Data Rows:    19067
Train Data Columns: 128
Label Column: label
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [0, 1]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    18658.69 MB
    Train Data (Original)  Memory Usage: 9.76 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Types of features in original data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    Types of features in processed data (raw dtype, special dtypes):
        ('float', []) : 128 | ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', ...]
    0.3s = Fit runtime
    128 features in original data used to generate 128 features in processed data.
    Train Data (Processed) Memory Usage: 9.76 MB (0.1% of available memory)
Data preprocessing and feature engineering runtime = 0.28s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 17160, Val Rows: 1907
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08dc0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.5501   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: KNeighborsDist ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1b7d08dc0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.5506   = Validation score   (accuracy)
    0.05s    = Training   runtime
    0.3s     = Validation runtime
Fitting model: LightGBMXT ...
    0.646    = Validation score   (accuracy)
    1.16s    = Training   runtime
    0.01s    = Validation runtime
Fitting model: LightGBM ...
    0.6403   = Validation score   (accuracy)
    1.03s    = Training   runtime
    0.01s    = Validation runtime
Fitting model: RandomForestGini ...
    0.635    = Validation score   (accuracy)
    3.6s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: RandomForestEntr ...
    0.6392   = Validation score   (accuracy)
    5.08s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: CatBoost ...
    0.656    = Validation score   (accuracy)
    4.77s    = Training   runtime
    0.0s     = Validation runtime
Fitting model: ExtraTreesGini ...
    0.6329   = Validation score   (accuracy)
    0.69s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: ExtraTreesEntr ...
    0.6382   = Validation score   (accuracy)
    0.73s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 3: early stopping
    0.6361   = Validation score   (accuracy)
    9.47s    = Training   runtime
    0.02s    = Validation runtime
Fitting model: XGBoost ...
    0.6319   = Validation score   (accuracy)
    2.19s    = Training   runtime
    0.01s    = Validation runtime
Fitting model: NeuralNetTorch ...
    0.6518   = Validation score   (accuracy)
    5.81s    = Training   runtime
    0.08s    = Validation runtime
Fitting model: LightGBMLarge ...
    0.6408   = Validation score   (accuracy)
    3.64s    = Training   runtime
    0.01s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.6686   = Validation score   (accuracy)
    0.73s    = Training   runtime
    0.0s     = Validation runtime
AutoGluon training complete, total runtime = 40.83s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240127_073726/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1822f2790>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa1f5074790>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
accuracy_score precision_score recall_score f1_score roc_auc_score
0 0.496119 0.0 0.0 0.0 0.499367